In [0]:
# Copyright 2017 Google LLC.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [0]:
# Install deps
# We're using some new features from tensorflow so we install tf-nightly
!pip install -q tensor2tensor tf-nightly

In [0]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import collections

from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import trainer_utils
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics

# Enable TF Eager execution
from tensorflow.contrib.eager.python import tfe
tfe.enable_eager_execution()

# Other setup
Modes = tf.estimator.ModeKeys

# Setup some directories
data_dir = os.path.expanduser("~/t2t/data")
tmp_dir = os.path.expanduser("~/t2t/tmp")
train_dir = os.path.expanduser("~/t2t/train")
checkpoint_dir = os.path.expanduser("~/t2t/checkpoints")
tf.gfile.MakeDirs(data_dir)
tf.gfile.MakeDirs(tmp_dir)
tf.gfile.MakeDirs(train_dir)
tf.gfile.MakeDirs(checkpoint_dir)
gs_data_dir = "gs://tensor2tensor-data"
gs_ckpt_dir = "gs://tensor2tensor-checkpoints/"

Download MNIST and inspect it


In [0]:
# A Problem is a dataset together with some fixed pre-processing.
# It could be a translation dataset with a specific tokenization,
# or an image dataset with a specific resolution.
#
# There are many problems available in Tensor2Tensor
problems.available()


Out[0]:
['algorithmic_addition_binary40',
 'algorithmic_addition_decimal40',
 'algorithmic_cipher_shift200',
 'algorithmic_cipher_shift5',
 'algorithmic_cipher_vigenere200',
 'algorithmic_cipher_vigenere5',
 'algorithmic_identity_binary40',
 'algorithmic_identity_decimal40',
 'algorithmic_multiplication_binary40',
 'algorithmic_multiplication_decimal40',
 'algorithmic_reverse_binary40',
 'algorithmic_reverse_binary40_test',
 'algorithmic_reverse_decimal40',
 'algorithmic_reverse_nlplike32k',
 'algorithmic_reverse_nlplike8k',
 'algorithmic_shift_decimal40',
 'audio_timit_characters_tune',
 'audio_timit_tokens8k_test',
 'audio_timit_tokens8k_tune',
 'image_celeba_tune',
 'image_cifar10',
 'image_cifar10_plain',
 'image_cifar10_plain8',
 'image_cifar10_tune',
 'image_fsns',
 'image_imagenet',
 'image_imagenet224',
 'image_imagenet32',
 'image_imagenet64',
 'image_mnist',
 'image_mnist_tune',
 'image_ms_coco_characters',
 'image_ms_coco_tokens32k',
 'image_ms_coco_tokens8k',
 'img2img_cifar10',
 'img2img_imagenet',
 'languagemodel_lm1b32k',
 'languagemodel_lm1b8k_packed',
 'languagemodel_lm1b_characters',
 'languagemodel_ptb10k',
 'languagemodel_ptb_characters',
 'languagemodel_wiki_full32k',
 'languagemodel_wiki_scramble128',
 'languagemodel_wiki_scramble1k50',
 'languagemodel_wiki_scramble8k50',
 'librispeech',
 'multinli_matched',
 'multinli_mismatched',
 'ocr_test',
 'parsing_english_ptb16k',
 'parsing_english_ptb8k',
 'parsing_icelandic16k',
 'programming_desc2code_cpp',
 'programming_desc2code_py',
 'sentiment_imdb',
 'summarize_cnn_dailymail32k',
 'translate_encs_wmt32k',
 'translate_encs_wmt_characters',
 'translate_ende_wmt32k',
 'translate_ende_wmt32k_packed',
 'translate_ende_wmt8k',
 'translate_ende_wmt_bpe32k',
 'translate_ende_wmt_characters',
 'translate_enfr_wmt32k',
 'translate_enfr_wmt32k_packed',
 'translate_enfr_wmt8k',
 'translate_enfr_wmt_characters',
 'translate_enfr_wmt_small32k',
 'translate_enfr_wmt_small8k',
 'translate_enfr_wmt_small_characters',
 'translate_enmk_setimes32k',
 'translate_enzh_wmt8k']

In [0]:
# Fetch the MNIST problem
mnist_problem = problems.problem("image_mnist")
# The generate_data method of a problem will download data and process it into
# a standard format ready for training and evaluation.
mnist_problem.generate_data(data_dir, tmp_dir)


INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/t2t/tmp/train-images-idx3-ubyte.gz
100% completed
INFO:tensorflow:Successfully downloaded train-images-idx3-ubyte.gz, 9912422 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/t2t/tmp/train-labels-idx1-ubyte.gz
113% completed
INFO:tensorflow:Successfully downloaded train-labels-idx1-ubyte.gz, 28881 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/t2t/tmp/t10k-images-idx3-ubyte.gz
100% completed
INFO:tensorflow:Successfully downloaded t10k-images-idx3-ubyte.gz, 1648877 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/t2t/tmp/t10k-labels-idx1-ubyte.gz
180% completed
INFO:tensorflow:Successfully downloaded t10k-labels-idx1-ubyte.gz, 4542 bytes.
INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz
INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz
INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz
INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Shuffling data...

In [0]:
# Now let's see the training MNIST data as Tensors.
mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()
image = mnist_example["inputs"]
label = mnist_example["targets"]

plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))
print("Label: %d" % label.numpy())


INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*
Label: 7

Translate from English to German with a pre-trained model


In [0]:
# Fetch the problem
ende_problem = problems.problem("translate_ende_wmt32k")

# Copy the vocab file locally so we can encode inputs and decode model outputs
# All vocabs are stored on GCS
vocab_file = os.path.join(gs_data_dir, "vocab.ende.32768")
!gsutil cp {vocab_file} {data_dir}

# Get the encoders from the problem
encoders = ende_problem.feature_encoders(data_dir)

# Setup helper functions for encoding and decoding
def encode(input_str, output_str=None):
  """Input str to features dict, ready for inference"""
  inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
  batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
  return {"inputs": batch_inputs}

def decode(integers):
  """List of ints to str"""
  integers = list(np.squeeze(integers))
  if 1 in integers:
    integers = integers[:integers.index(1)]
  return encoders["inputs"].decode(np.squeeze(integers))



Updates are available for some Cloud SDK components.  To install them,
please run:
  $ gcloud components update

Copying gs://tensor2tensor-data/vocab.ende.32768...
/ [1 files][316.4 KiB/316.4 KiB]                                                
Operation completed over 1 objects/316.4 KiB.                                    

In [0]:
# # Generate and view the data
# # This cell is commented out because WMT data generation can take hours

# ende_problem.generate_data(data_dir, tmp_dir)
# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()
# inputs = [int(x) for x in example["inputs"].numpy()] # Cast to ints.
# targets = [int(x) for x in example["targets"].numpy()] # Cast to ints.



# # Example inputs as int-tensor.
# print("Inputs, encoded:")
# print(inputs)
# print("Inputs, decoded:")
# # Example inputs as a sentence.
# print(decode(inputs))
# # Example targets as int-tensor.
# print("Targets, encoded:")
# print(targets)
# # Example targets as a sentence.
# print("Targets, decoded:")
# print(decode(targets))

In [0]:
# There are many models available in Tensor2Tensor
registry.list_models()


Out[0]:
['resnet50',
 'lstm_seq2seq',
 'transformer_encoder',
 'attention_lm',
 'vanilla_gan',
 'transformer',
 'gene_expression_conv',
 'transformer_moe',
 'attention_lm_moe',
 'transformer_revnet',
 'lstm_seq2seq_attention',
 'shake_shake',
 'transformer_ae',
 'diagonal_neural_gpu',
 'xception',
 'aligned',
 'multi_model',
 'neural_gpu',
 'slice_net',
 'byte_net',
 'cycle_gan',
 'transformer_sketch',
 'blue_net']

In [0]:
# Create hparams and the model
model_name = "transformer"
hparams_set = "transformer_base"

hparams = trainer_utils.create_hparams(hparams_set, data_dir)
trainer_utils.add_problem_hparams(hparams, "translate_ende_wmt32k")

# NOTE: Only create the model once when restoring from a checkpoint; it's a
# Layer and so subsequent instantiations will have different variable scopes
# that will not match the checkpoint.
translate_model = registry.model(model_name)(hparams, Modes.EVAL)

In [0]:
# Copy the pretrained checkpoint locally
ckpt_name = "transformer_ende_test"
gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)
!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}
ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))
ckpt_path


Out[0]:
u'/content/t2t/checkpoints/transformer_ende_test/model.ckpt-350855'

In [0]:
# Restore and translate!
def translate(inputs):
  encoded_inputs = encode(inputs)
  with tfe.restore_variables_on_create(ckpt_path):
    model_output = translate_model.infer(encoded_inputs)
  return decode(model_output)

inputs = "The animal didn't cross the street because it was too tired"
outputs = translate(inputs)

print("Inputs: %s" % inputs)
print("Outputs: %s" % outputs)


INFO:tensorflow:Greedy Decoding
Inputs: The animal didn't cross the street because it was too tired
Outputs: Das Tier überquerte die Straße nicht, weil es zu müde war, weil es zu müde war.

Attention Viz Utils


In [0]:
from tensor2tensor.visualization import attention
from tensor2tensor.data_generators import text_encoder

SIZE = 35

def encode_eval(input_str, output_str):
  inputs = tf.reshape(encoders["inputs"].encode(input_str) + [1], [1, -1, 1, 1])  # Make it 3D.
  outputs = tf.reshape(encoders["inputs"].encode(output_str) + [1], [1, -1, 1, 1])  # Make it 3D.
  return {"inputs": inputs, "targets": outputs}

def get_att_mats():
  enc_atts = []
  dec_atts = []
  encdec_atts = []

  for i in range(hparams.num_hidden_layers):
    enc_att = translate_model.attention_weights[
      "transformer/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]
    dec_att = translate_model.attention_weights[
      "transformer/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]
    encdec_att = translate_model.attention_weights[
      "transformer/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention" % i][0]
    enc_atts.append(resize(enc_att))
    dec_atts.append(resize(dec_att))
    encdec_atts.append(resize(encdec_att))
  return enc_atts, dec_atts, encdec_atts

def resize(np_mat):
  # Sum across heads
  np_mat = np_mat[:, :SIZE, :SIZE]
  row_sums = np.sum(np_mat, axis=0)
  # Normalize
  layer_mat = np_mat / row_sums[np.newaxis, :]
  lsh = layer_mat.shape
  # Add extra dim for viz code to work.
  layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2]))
  return layer_mat

def to_tokens(ids):
  ids = np.squeeze(ids)
  subtokenizer = hparams.problems[0].vocabulary['targets']
  tokens = []
  for _id in ids:
    if _id == 0:
      tokens.append('<PAD>')
    elif _id == 1:
      tokens.append('<EOS>')
    elif _id == -1:
      tokens.append('<NULL>')
    else:
        tokens.append(subtokenizer._subtoken_id_to_subtoken_string(_id))
  return tokens

In [0]:
def call_html():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
              jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
            },
          });
        </script>
        '''))

Display Attention


In [0]:
# Convert inputs and outputs to subwords
inp_text = to_tokens(encoders["inputs"].encode(inputs))
out_text = to_tokens(encoders["inputs"].encode(outputs))

# Run eval to collect attention weights
example = encode_eval(inputs, outputs)
with tfe.restore_variables_on_create(ckpt_path):
  translate_model.set_mode(Modes.EVAL)
  translate_model(example)
# Get normalized attention weights for each layer
enc_atts, dec_atts, encdec_atts = get_att_mats()

call_html()
attention.show(inp_text, out_text, enc_atts, dec_atts, encdec_atts)


WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Layer: Attention:

Train a custom model on MNIST


In [0]:
# Create your own model

class MySimpleModel(t2t_model.T2TModel):

  def model_fn_body(self, features):
    inputs = features["inputs"]
    filters = self.hparams.hidden_size
    h1 = tf.layers.conv2d(inputs, filters,
                          kernel_size=(5, 5), strides=(2, 2))
    h2 = tf.layers.conv2d(tf.nn.relu(h1), filters,
                          kernel_size=(5, 5), strides=(2, 2))
    return tf.layers.conv2d(tf.nn.relu(h2), filters,
                            kernel_size=(3, 3))

hparams = trainer_utils.create_hparams("basic_1", data_dir)
hparams.hidden_size = 64
trainer_utils.add_problem_hparams(hparams, "image_mnist")
model = MySimpleModel(hparams, Modes.TRAIN)

In [0]:
# Prepare for the training loop

# In Eager mode, opt.minimize must be passed a loss function wrapped with
# implicit_value_and_gradients
@tfe.implicit_value_and_gradients
def loss_fn(features):
  _, losses = model(features)
  return losses["training"]

# Setup the training data
BATCH_SIZE = 128
mnist_train_dataset = mnist_problem.dataset(Modes.TRAIN, data_dir)
mnist_train_dataset = mnist_train_dataset.repeat(None).batch(BATCH_SIZE)

optimizer = tf.train.AdamOptimizer()


INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*

In [0]:
# Train
NUM_STEPS = 500

for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):
  example["targets"] = tf.reshape(example["targets"], [BATCH_SIZE, 1, 1, 1])  # Make it 4D.
  loss, gv = loss_fn(example)
  optimizer.apply_gradients(gv)

  if count % 50 == 0:
    print("Step: %d, Loss: %.3f" % (count, loss.numpy()))
  if count >= NUM_STEPS:
    break


Step: 0, Loss: 0.513
Step: 50, Loss: 0.342
Step: 100, Loss: 0.315
Step: 150, Loss: 0.372
Step: 200, Loss: 0.324
Step: 250, Loss: 0.271
Step: 300, Loss: 0.281
Step: 350, Loss: 0.285
Step: 400, Loss: 0.250
Step: 450, Loss: 0.247
Step: 500, Loss: 0.338

In [0]:
# This will eventually be available at
# tensor2tensor.metrics.create_eager_metrics
def create_eager_metrics(metric_names):
  """Create metrics accumulators and averager for Eager mode.

  Args:
    metric_names: list<str> from tensor2tensor.metrics.Metrics

  Returns:
    (accum_fn(predictions, targets) => None,
     result_fn() => dict<str metric_name, float avg_val>
  """
  metric_fns = dict(
      [(name, metrics.METRICS_FNS[name]) for name in metric_names])
  tfe_metrics = dict()

  for name in metric_names:
    tfe_metrics[name] = tfe.metrics.Mean(name=name)

  def metric_accum(predictions, targets):
    for name, metric_fn in metric_fns.items():
      val, weight = metric_fn(predictions, targets,
                              weights_fn=common_layers.weights_all)
      tfe_metrics[name](np.squeeze(val), np.squeeze(weight))

  def metric_means():
    avgs = {}
    for name in metric_names:
      avgs[name] = tfe_metrics[name].result().numpy()
    return avgs

  return metric_accum, metric_means

In [0]:
model.set_mode(Modes.EVAL)
mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)

# Create eval metric accumulators for accuracy (ACC) and accuracy in
# top 5 (ACC_TOP5)
metrics_accum, metrics_result = create_eager_metrics(
    [metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5])

for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):
  if count >= 200:
    break

  # Make the inputs and targets 4D
  example["inputs"] = tf.reshape(example["inputs"], [1, 28, 28, 3])
  example["targets"] = tf.reshape(example["targets"], [1, 1, 1, 1])

  # Call the model
  predictions, _ = model(example)

  # Compute and accumulate metrics
  metrics_accum(predictions, example["targets"])

# Print out the averaged metric values on the eval data
for name, val in metrics_result().items():
  print("%s: %.2f" % (name, val))


INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*
accuracy_top5: 1.00
accuracy: 0.99